In [1]:
%reload_ext watermark
%watermark -p pandas,networkx,numpy,matplotlib -v -n
In [2]:
import csv
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [3]:
# Create empty graph
G = nx.Graph()
In [4]:
# Add nodes
G.add_node(1)
G.add_nodes_from([2, 3])
G.add_node(4)
In [5]:
G.nodes()
Out[5]:
⚠️
Note: In networkx 2.0, several methods now return iterators
For more details see: https://networkx.github.io/documentation/development/reference/migration_guide_from_1.x_to_2.0.html
In [6]:
# add edges
G.add_edge(1, 2)
In [7]:
# get graph info
print(nx.info(G))
In [8]:
nx.draw(G, with_labels=True)
In [9]:
# add at creation
# nodes
G.add_node(5, favorite_color='blue')
G.add_nodes_from([(6, {'favorite_color' : 'red'}),
(7, {'favorite_color' :'purple'})])
# edges
G.add_edge(5, 6, {'relationship' : 'best friends'})
In [10]:
# accessing node attributes
print("Node 5 attributes:", G.node[5])
# accessing edge attributes
print("Edge 5-6 attributes:", G.edge[5][6])
In [11]:
favorite_foods = {
1 : 'pizza',
2 : 'mac and cheese',
3 : 'balogna sandwich',
4 : 'pizza',
5 : 'chocolate',
6 : 'pizza',
7 : 'bananas'
}
In [12]:
nx.set_node_attributes(G, 'favorite_food', favorite_foods)
In [13]:
print("Node 4's favorite food is %s" % G.node[4]['favorite_food'])
Data for graphs and networks comes in many different representations.
Representations:
Note: Representations are related to, but distinct from, the storage format. In our examples, we'll be loading our data from text files. You may also have network data stored as JSON, GEXF, or other formats. For more details, check the docs.
An edge list is a common way of representing a graph. This representation can be thought of as a list of tuples, where each tuple represents an edge between two of the nodes in your graph. The nodes of the graph can be inferred by taking the set of objects from all tuples.
You can infer/determine whether a graph is directed or weighted from an edge list.
In [14]:
# what does it look like?
!head ../data/ga_edgelist.csv
In [15]:
edges = []
with open('../data/ga_edgelist.csv', 'r') as f:
filereader = csv.reader(f, delimiter=",", quotechar='"')
next(filereader) # skips header row
for row in filereader:
edges.append(row)
In [16]:
edges[0:5]
Out[16]:
In [17]:
GA = nx.from_edgelist(edges)
In [18]:
print(nx.info(GA))
Often times the data we'll want to use will probably be analyzed beforehand with pandas. Reading in our data to a DataFrame first saves us a bit of time writng code to open the files due to read_csv having sensible defaults around quoted characters and header rows.
In [19]:
ga_edges = pd.read_csv('../data/ga_edgelist.csv')
ga_edges.head()
Out[19]:
In [20]:
GA = nx.from_pandas_dataframe(ga_edges, source="from", target="to")
In [21]:
# validate info
print(nx.info(GA))
In [22]:
nx.draw(GA, with_labels=True)
A common way of representing graph data is through an adjacency matrix -- often referred to mathematically as A. This data structure is a square, n x n matrix where n = number of nodes. Each column and row in the matrix is a node. For any two nodes, i and j the value at Aij (row i and column j) represents the weight of the edge between nodes i and j.
In [23]:
ga_adj = pd.read_csv('../data/ga_adj.csv', index_col=0)
ga_adj.ix[0:5, 0:5]
Out[23]:
In [24]:
GAAdj = nx.from_numpy_matrix(ga_adj.values)
In [25]:
# Numpy matrices don't have labels :(
print(GAAdj.nodes())
In [26]:
label_mapping = dict(zip(GAAdj.nodes(), ga_adj.columns))
GAAdj = nx.relabel_nodes(GAAdj, label_mapping)
In [27]:
nx.draw_spring(GAAdj, with_labels=True)
In [28]:
# Easiest, least robust way:
print("Edge List Graph\n", nx.info(GA))
print("\nAdj. Matrix Graph\n", nx.info(GAAdj))
In [29]:
# Fancy math way that checks additional conditions
print("Isomorphic?", nx.is_isomorphic(GA, GAAdj))
In [30]:
print("'denny' From Edge List Graph:", GA['denny'])
print("'denny' From Adjacency Matrix Graph:", GAAdj['denny'])
⚠️ Observation: Edge weights are inferred from adjacency matrix
In [31]:
original_edgelist = sorted(nx.to_edgelist(GA))
adjacency_edgelist = sorted(nx.to_edgelist(GAAdj))
for i, edge in enumerate(original_edgelist):
adjacency_edge = adjacency_edgelist[i]
if edge[0] != adjacency_edge[0]:
print("Sorted Edge Mismatch at edge %s:" % i, edge, adjacency_edge)
break
⚠️ Observation: Source and Target are ambiguously defined in undirected graphs
In [32]:
nx.write_gexf(GA, '../data/ga_graph.gexf')